package org.shj.spark.dataframe;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

public class DataFrameCreate {

	public static void main(String[] args) {
		
		SparkSession ss = SparkSession.builder().master("local").
				appName("DataFrameCreate").getOrCreate();
		ss.sparkContext().setLogLevel("WARN");
		Dataset<Row> df = ss.read().json("src/main/resources/students.txt");
		
		df = df.orderBy("age");
		df.show();//打印这张表
		System.out.println("==========");
		
		df.printSchema();//打印元数据
		System.out.println("==========");
		
		df.select("name").show();//查询列，并显示
		System.out.println("==========");
		
		df.filter(df.col("age").geq(18)).show();
		System.out.println("==========");

		long cnt = df.select(df.col("age")).count();
		System.out.println(cnt);
		System.out.println("==========");
		
		ss.stop();
	}

}
